#author; Danielle Remmerswaal
#last update: 18-01-2024

# 1. Participation and nonresponse ----------------------
#invitation letter
#stage 1 = installing app and registrering in it
#stage 2 = logging at least 1 activity
#stage 3 = complete study task (keep app installed for study duration)
faseinfo23 <- read.csv(file = "Data/faseinfo23.csv" ) %>% distinct()
length(unique(faseinfo23$user_id)) #315 participants registrered 
Participation <- read.csv(file = "Data/paper1/participation.csv") %>% distinct()
# TABLE 2 #----------
prop.table(table(Participation$lengte, Participation$stage1), 1)
prop.table(table(Participation$lengte, Participation$stage2), 1)
prop.table(table(Participation$lengte, Participation$stage3), 1)
table(Participation$lengte, Participation$stage1)
binconf(x=139 ,n =1272 , method = "wilson")
binconf(x=176 ,n =1272 , method = "wilson")
table(Participation$lengte, Participation$stage2)
binconf(x=133 ,n =1272 , method = "wilson")
binconf(x=159 ,n =1272 , method = "wilson")
table(Participation$lengte, Participation$stage3)
binconf(x=94 ,n =1272 , method = "wilson")
binconf(x=92 ,n =1272 , method = "wilson")
#rate of stage 2 conditional on 1
(table(Participation$lengte, Participation$stage2))/(table(Participation$lengte, Participation$stage1))
binconf(x=133 ,n =139 , method = "wilson")
binconf(x=159 ,n =176 , method = "wilson")
#rate of stage 3 conditional on 2
(table(Participation$lengte, Participation$stage3))/(table(Participation$lengte, Participation$stage2))
binconf(x=94 ,n =133 , method = "wilson")
binconf(x=92 ,n =159 , method = "wilson")

#1A) significance tests -------
mosaicplot(prop.table(table(Participation$lengte, Participation$stage1), 1))
#chi-square
chisq.test((table(Participation$lengte, Participation$stage1)))
chisq.test((table(Participation$lengte, Participation$stage2)))
chisq.test(table(Participation$lengte[Participation$stage1==1], Participation$stage2[Participation$stage1==1]))
chisq.test((table(Participation$lengte, Participation$stage3)))
chisq.test((table(duration =Participation$lengte[Participation$stage2==1], dropout= Participation$stage3[Participation$stage2==1]))) #H1.3:dropout less in one-day

#1B) bootstrap -------

#STUDYDURATION 1
Participation1 <- Participation %>% filter(lengte==1)
#bootstrap
R=1000
Participation_Boot <- NULL # Storage for Bootstrap,  participation rates 
set.seed(2022) # Set seed 
for(r in 1:R){ # Bootstrap 
  # Create resampled dataset from the data:  
  sample_d = Participation1[sample(1:nrow(Participation1), nrow(Participation1), replace = TRUE), ]
  # Calculate the registration rate in the sample: 
  stage1b <- mean(sample_d$stage1)
  # Save the results:  
  Participation_Boot <- c(Participation_Boot, stage1b)
}
mean(Participation_Boot[1:1000])*100 # Bootstrapped mean 
quantile(Participation_Boot, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

#studyduration 1 - stage 2
Participation_Boot <- NULL # Storage for Bootstrap,  participation rates 
set.seed(2022) # Set seed 
for(r in 1:R){ # Bootstrap 
  # Create resampled dataset from the data:  
  sample_d = Participation1[sample(1:nrow(Participation1), nrow(Participation1), replace = TRUE), ]
  # Calculate the registration rate in the sample: 
  stage2b <- mean(sample_d$stage2)
  # Save the results:  
  Participation_Boot <- c(Participation_Boot, stage2b)
}
mean(Participation_Boot[1:1000])*100 # Bootstrapped mean 
quantile(Participation_Boot, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

#study duration 1 - stage 3
Participation_Boot <- NULL # Storage for Bootstrap,  participation rates 
set.seed(2022) # Set seed 

for(r in 1:R){ # Bootstrap 
  # Create resampled dataset from the data:  
  sample_d = Participation1[sample(1:nrow(Participation1), nrow(Participation1), replace = TRUE), ]
  # Calculate the registration rate in the sample: 
  stage3b <- mean(sample_d$stage3)
  # Save the results:  
  Participation_Boot <- c(Participation_Boot, stage3b)
}
mean(Participation_Boot[1:1000])*100 # Bootstrapped mean 
quantile(Participation_Boot, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

#study duration 1 - stage 2/1
Participation_Boot <- NULL # Storage for Bootstrap,  participation rates 
set.seed(2022) # Set seed 
for(r in 1:R){ # Bootstrap 
  # Create resampled dataset from the data:  
  sample_d = Participation1[sample(1:nrow(Participation1), nrow(Participation1), replace = TRUE), ]
  # Calculate the registration rate in the sample: 
  stage1b <- mean(sample_d$stage1)
  stage2b <- mean(sample_d$stage2)
  # Calculate conditional activity rate in Bootstrapped samples:  
  activity <- (sum(sample_d$stage2)/sum(sample_d$stage1))
  # Save the results:  
  Participation_Boot <- c(Participation_Boot, activity)
}
mean(Participation_Boot[1:1000])*100 # Bootstrapped mean 
quantile(Participation_Boot, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

#study duration 1 - stage 3/2
Participation_Boot <- NULL # Storage for Bootstrap,  participation rates 
set.seed(2022) # Set seed 

for(r in 1:R){ # Bootstrap 
  # Create resampled dataset from the data:  
  sample_d = Participation1[sample(1:nrow(Participation1), nrow(Participation1), replace = TRUE), ]
  # Calculate the registration rate in the sample: 
  stage2b <- mean(sample_d$stage2)
  stage3b <- mean(sample_d$stage3)
  # Calculate conditional activity rate in Bootstrapped samples:  
  completion <- (sum(sample_d$stage3)/sum(sample_d$stage2))
  # Save the results:  
  Participation_Boot <- c(Participation_Boot, completion)
}
mean(Participation_Boot[1:1000])*100 # Bootstrapped mean 
quantile(Participation_Boot, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

#STUDYDURATION 7 
Participation7 <- Participation %>% filter(lengte==7)
#bootstrap 
R=1000
Participation_Boot <- NULL # Storage for Bootstrap,  participation rates 
set.seed(2022) # Set seed 
#7 days - stage 1
for(r in 1:R){ # Bootstrap 
  # Create resampled dataset from the data:  
  sample_d = Participation7[sample(1:nrow(Participation7), nrow(Participation7), replace = TRUE), ]
  # Calculate the registration rate in the sample: 
  stage1b <- mean(sample_d$stage1)
  # Save the results:  
  Participation_Boot <- c(Participation_Boot, stage1b)
}
mean(Participation_Boot[1:1000])*100 # Bootstrapped mean 
quantile(Participation_Boot, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

#studyduration 7 - stage 2
Participation_Boot <- NULL # Storage for Bootstrap,  participation rates 
set.seed(2022) # Set seed 
for(r in 1:R){ # Bootstrap 
  # Create resampled dataset from the data:  
  sample_d = Participation7[sample(1:nrow(Participation7), nrow(Participation7), replace = TRUE), ]
  # Calculate the registration rate in the sample: 
  stage2b <- mean(sample_d$stage2)
  # Save the results:  
  Participation_Boot <- c(Participation_Boot, stage2b)
}
mean(Participation_Boot[1:1000])*100 # Bootstrapped mean 
quantile(Participation_Boot, probs = c(0.025, 0.975))*100 # Bootstrapped CI 
#study duration 7 - stage 3

Participation_Boot <- NULL # Storage for Bootstrap,  participation rates 
set.seed(2022) # Set seed 

for(r in 1:R){ # Bootstrap 
  # Create resampled dataset from the data:  
  sample_d = Participation7[sample(1:nrow(Participation7), nrow(Participation7), replace = TRUE), ]
  # Calculate the registration rate in the sample: 
  stage3b <- mean(sample_d$stage3)
  # Save the results:  
  Participation_Boot <- c(Participation_Boot, stage3b)
}
mean(Participation_Boot[1:1000])*100 # Bootstrapped mean 
quantile(Participation_Boot, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

#study duration 7 - stage 2/1
Participation_Boot <- NULL # Storage for Bootstrap,  participation rates 
set.seed(2022) # Set seed 

for(r in 1:R){ # Bootstrap 
  # Create resampled dataset from the data:  
  sample_d = Participation7[sample(1:nrow(Participation7), nrow(Participation7), replace = TRUE), ]
  # Calculate the registration rate in the sample: 
  stage1b <- mean(sample_d$stage1)
  stage2b <- mean(sample_d$stage2)
  # Calculate conditional activity rate in Bootstrapped samples:  
  activity <- (sum(sample_d$stage2)/sum(sample_d$stage1))
  # Save the results:  
  Participation_Boot <- c(Participation_Boot, activity)
}

mean(Participation_Boot[1:1000])*100 # Bootstrapped mean 
quantile(Participation_Boot, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

#study duration 7 - stage 3/2
Participation_Boot <- NULL # Storage for Bootstrap,  participation rates 
set.seed(2022) # Set seed 

for(r in 1:R){ # Bootstrap 
  # Create resampled dataset from the data:  
  sample_d = Participation7[sample(1:nrow(Participation7), nrow(Participation7), replace = TRUE), ]
  # Calculate the registration rate in the sample: 
  stage2b <- mean(sample_d$stage2)
  stage3b <- mean(sample_d$stage3)
  # Calculate conditional activity rate in Bootstrapped samples:  
  completion <- (sum(sample_d$stage3)/sum(sample_d$stage2))
  # Save the results:  
  Participation_Boot <- c(Participation_Boot, completion)
}
mean(Participation_Boot[1:1000])*100 # Bootstrapped mean 
quantile(Participation_Boot, probs = c(0.025, 0.975))*100 # Bootstrapped CI 

